home *** CD-ROM | disk | FTP | other *** search
/ AmigActive 2 / AACD 2.iso / AACD / Magazine / GraphicsCards / StormMesa / src / shade_asmppc.p < prev    next >
Text File  |  1999-02-04  |  13KB  |  357 lines

  1. ;
  2. ; Mesa 3-D graphics library
  3. ; Version:  2.5
  4. ; Copyright (C) 1995-1997  Brian Paul
  5. ;
  6. ; This library is free software; you can redistribute it and/or
  7. ; modify it under the terms of the GNU Library General Public
  8. ; License as published by the Free Software Foundation; either
  9. ; version 2 of the License, or (at your option) any later version.
  10. ;
  11. ; This library is distributed in the hope that it will be useful,
  12. ; but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14. ; Library General Public License for more details.
  15. ;
  16. ; You should have received a copy of the GNU Library General Public
  17. ; License along with this library; if not, write to the Free
  18. ; Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19. ;
  20.  
  21. ;   shade_asmppc.p
  22. ;   9.1.1998 by Sam Jordan
  23. ;
  24. ;   PowerPC assembler optimizations of several functions in shade.c
  25. ;   Originally written for AMIGA OS/PowerOpen. To use this source on other
  26. ;   PowerPC based platforms or with other programming models, some
  27. ;   modifications might be needed.
  28.  
  29.                 include exec/types.i
  30.  
  31. USE_SUFFIX      =       1               ; should be zero for any other
  32.                                         ; compiler than StormC
  33. SHINE_TABLE_SIZE        =       200     ; keep consistent to types.h
  34.  
  35.                 IFNE    USE_SUFFIX
  36.                 XREF    _pow__r
  37.                 ELSEIF
  38.                 XREF    _pow
  39.                 ENDC
  40.  
  41.                 STRUCTURE       SHADE_OPT,0
  42.                 APTR            H_INF_NORM
  43.                 APTR            VP_INF_NORM
  44.                 APTR            MATDIFFUSE
  45.                 APTR            MATSPECULAR
  46.                 FLOAT           SHININESS
  47.                 APTR            SHINETABLE
  48.                 LABEL           SHADE_OPT_SIZE
  49.  
  50. ; Note: these optimizations assume that the number of lights
  51. ;  is less than two
  52.  
  53.                 XDEF    _asm_shade_rgba_fast
  54.  
  55. _asm_shade_rgba_fast
  56.  
  57. ; r3 = n
  58. ; r4 = side
  59. ; r5 = normal
  60. ; r6 = color
  61. ; r7 = baseColor
  62. ; r8 = sh (NULL if no lights are enabled)
  63.  
  64.                 stfd    f31,-8*2(r1)
  65.                 stfd    f30,-8*3(r1)
  66.                 stfd    f29,-8*4(r1)
  67.                 stfd    f28,-8*5(r1)
  68.                 stfd    f27,-8*6(r1)
  69.                 stfd    f26,-8*7(r1)
  70.                 stfd    f25,-8*8(r1)
  71.                 stfd    f24,-8*9(r1)
  72.                 mflr    r0
  73.                 stfd    f23,-8*10(r1)
  74.                 stfd    f22,-8*11(r1)
  75.                 stfd    f21,-8*12(r1)
  76.                 stfd    f20,-8*13(r1)
  77.                 stfd    f19,-8*14(r1)
  78.                 stfd    f18,-8*15(r1)
  79.                 stfd    f17,-8*16(r1)
  80.                 stfd    f16,-8*17(r1)
  81.                 stfd    f15,-8*18(r1)
  82.                 stfd    f14,-8*19(r1)
  83.                 stw     r0,8(r1)
  84.                 stw     r31,-8*19-1*4(r1)
  85.                 stw     r30,-8*19-2*4(r1)
  86.                 stw     r29,-8*19-3*4(r1)
  87.                 stw     r28,-8*19-4*4(r1)
  88.                 stw     r27,-8*19-5*4(r1)
  89.                 stw     r26,-8*19-6*4(r1)
  90.                 stw     r25,-8*19-7*4(r1)
  91.                 stw     r24,-8*19-8*4(r1)
  92.                 stw     r23,-8*19-9*4(r1)
  93.                 stwu    r1,-320(r1)
  94.  
  95.                 lfs     f27,_f_1(r2)            ;f27 = 1.0
  96.                 fsubs   f26,f27,f27             ;f26 = 0.0
  97.  
  98. ;   sumA = (GLint) (baseColor[3] * 255.0F);
  99.  
  100.                 lfs     f3,_f_255(r2)
  101.                 lfs     f1,12(r7)
  102.                 fmuls   f1,f1,f3
  103.                 fctiwz  f1,f1
  104.                 stfd    f1,-8(r1)
  105.                 lwz     r31,-4(r1)              ;r31 = sumA
  106.                 mr      r30,r3                  ;r30 = loopcounter
  107.                 subi    r29,r5,4                ;r29 = normal
  108.                 subi    r28,r6,4                ;r28 = color
  109.                 mr      r23,r8                  ;r23 = sh
  110.                 mr      r27,r4                  ;r27 = side
  111.                 lfs     f10,0(r7)               ;f10 = sumR
  112.                 lfs     f11,4(r7)               ;f11 = sumG
  113.                 lfs     f12,8(r7)               ;f12 = sumB
  114.                 mr.     r8,r8
  115.                 beq     .next
  116.                 lwz     r9,VP_INF_NORM(r8)
  117.                 lfs     f23,0(r9)               ;f23 = VP_inf_norm[0]
  118.                 lfs     f24,4(r9)               ;f24 = VP_inf_norm[1]
  119.                 lfs     f25,8(r9)               ;f25 = VP_inf_norm[2]
  120.                 lwz     r9,H_INF_NORM(r8)
  121.                 lfs     f20,0(r9)               ;f20 = h_inf_norm[0]
  122.                 lfs     f21,4(r9)               ;f21 = h_inf_norm[1]
  123.                 lfs     f22,8(r9)               ;f22 = h_inf_norm[2]
  124.                 lwz     r9,MATDIFFUSE(r8)
  125.                 lfs     f17,0(r9)               ;f17 = MatDiffuse[side][0]
  126.                 lfs     f18,4(r9)               ;f18 = MatDiffuse[side][1]
  127.                 lfs     f19,8(r9)               ;f19 = MatDiffuse[side][2]
  128.                 lwz     r9,MATSPECULAR(r8)
  129.                 lfs     f14,0(r9)               ;f14 = MatSpecular[side][0]
  130.                 lfs     f15,4(r9)               ;f15 = MatSpecular[side][1]
  131.                 lfs     f16,8(r9)               ;f16 = MatSpecular[side][2]
  132.                 lfs     f28,SHININESS(r8)       ;f28 = Material[side].Shininess
  133.                 lwz     r26,SHINETABLE(r8)      ;r26 -> ShineTable
  134.  
  135. ;   for (j=0;j<n;j++) {
  136.  
  137. .next
  138. .loop
  139.  
  140. ;      /* the normal vector */
  141. ;      if (side==0) {
  142. ;         nx = normal[j][0];
  143. ;         ny = normal[j][1];
  144. ;         nz = normal[j][2];
  145. ;      }
  146. ;      else {
  147. ;         nx = -normal[j][0];
  148. ;         ny = -normal[j][1];
  149. ;         nz = -normal[j][2];
  150. ;      }
  151.  
  152.                 mr.     r27,r27
  153.                 lfsu    f0,4(r29)               ;f0 = nx
  154.                 lfsu    f1,4(r29)               ;f1 = ny
  155.                 lfsu    f2,4(r29)               ;f2 = nz
  156.                 beq     .zero
  157.                 fneg    f0,f0
  158.                 fneg    f1,f1
  159.                 fneg    f2,f2
  160. .zero
  161.  
  162. ;      for (light=ctx->Light.FirstEnabled; light; light=light->NextEnabled) {
  163.  
  164.                 mr.     r23,r23
  165.  
  166. ;      sumR = baseColor[0];
  167. ;      sumG = baseColor[1];
  168. ;      sumB = baseColor[2];
  169.  
  170.  
  171. ;         n_dot_VP = nx * light->VP_inf_norm[0]
  172. ;                  + ny * light->VP_inf_norm[1]
  173. ;                  + nz * light->VP_inf_norm[2];
  174.  
  175.                 fmuls   f3,f23,f0
  176.                 fmr     f29,f10                 ;f29 = sumR
  177.                 fmadds  f3,f24,f1,f3
  178.                 fmr     f30,f11                 ;f30 = sumG
  179.                 fmadds  f3,f25,f2,f3            ;f3 = n_dot_VP
  180.                 fmr     f31,f12                 ;f31 = sumB
  181.                 beq     .end
  182.  
  183. ;         if (n_dot_VP>0.0F) {
  184.  
  185.                 fcmpu   f3,f26
  186.                 ble     .end
  187.  
  188. ;            sumR += n_dot_VP * lightMatDiffuse[0];
  189. ;            sumG += n_dot_VP * lightMatDiffuse[1];
  190. ;            sumB += n_dot_VP * lightMatDiffuse[2];
  191.  
  192.                 fmadds  f29,f3,f17,f29
  193.                 fmuls   f4,f20,f0
  194.                 fmadds  f30,f3,f18,f30
  195.                 fmadds  f4,f21,f1,f4
  196.                 fmadds  f31,f3,f19,f31
  197.                 fmadds  f4,f22,f2,f4            ;f4 = n_dot_h
  198.  
  199. ;            n_dot_h = nx * light->h_inf_norm[0]
  200. ;                    + ny * light->h_inf_norm[1]
  201. ;                    + nz * light->h_inf_norm[2];
  202.  
  203.  
  204. ;            if (n_dot_h>0.0F) {
  205.  
  206.                 fcmpu   f4,f26
  207.                 ble     .end
  208.  
  209. ;               if (n_dot_h>1.0F) {
  210.  
  211.                 fcmpu   f4,f27
  212.                 ble     .cont
  213.  
  214. ;                  GLfloat spec_coef = pow( n_dot_h,
  215. ;                                        ctx->Light.Material[side].Shininess );
  216. ;                  if (spec_coef>1.0e-10F) {
  217. ;                     sumR += spec_coef * light->MatSpecular[side][0];
  218. ;                     sumG += spec_coef * light->MatSpecular[side][1];
  219. ;                     sumB += spec_coef * light->MatSpecular[side][2];
  220. ;                  }
  221.  
  222.                 fmr     f1,f4
  223.                 fmr     f2,f28
  224.                 stfd    f10,40(r1)
  225.                 stfd    f11,48(r1)
  226.                 stfd    f12,56(r1)
  227.                 IFNE    USE_SUFFIX
  228.                 bl      _pow__r
  229.                 ELSE
  230.                 bl      _pow
  231.                 ENDC
  232.                 lfd     f12,56(r1)
  233.                 lfd     f11,48(r1)
  234.                 lfd     f10,40(r1)
  235.                 fmadds  f29,f1,f14,f29
  236.                 fmadds  f30,f1,f15,f30
  237.                 fmadds  f31,f1,f16,f31
  238.                 b       .end
  239. .cont
  240.  
  241. ;                  int k = (int) (n_dot_h * (GLfloat) (SHINE_TABLE_SIZE-1));
  242. ;                  struct gl_material *m = &ctx->Light.Material[side];
  243. ;                  GLfloat spec_coef;
  244. ;                  if (m->ShineTable[k] < 0.0F)
  245. ;                  {
  246. ;                     m->ShineTable[k] = pow( n_dot_h, m->Shininess );
  247. ;                  }
  248. ;                  spec_coef = m->ShineTable[k];
  249. ;                  sumR += spec_coef * light->MatSpecular[side][0];
  250. ;                  sumG += spec_coef * light->MatSpecular[side][1];
  251. ;                  sumB += spec_coef * light->MatSpecular[side][2];
  252.  
  253.                 lfs     f0,_f_s(r2)
  254.                 fmuls   f5,f0,f4
  255.                 fctiwz  f5,f5
  256.                 stfd    f5,-8(r1)
  257.                 lwz     r0,-4(r1)
  258.                 slwi    r25,r0,2
  259.                 lfsx    f1,r26,r25
  260.                 fcmpu   f1,f26
  261.                 bge     .skip
  262.                 fmr     f1,f4
  263.                 fmr     f2,f28
  264.                 stfd    f10,40(r1)
  265.                 stfd    f11,48(r1)
  266.                 stfd    f12,56(r1)
  267.                 IFNE    USE_SUFFIX
  268.                 bl      _pow__r
  269.                 ELSE
  270.                 bl      _pow
  271.                 ENDC
  272.                 lfd     f12,56(r1)
  273.                 lfd     f11,48(r1)
  274.                 lfd     f10,40(r1)
  275.                 stfsx   f1,r26,r25
  276.  
  277. .skip
  278.                 fmadds  f29,f1,f14,f29
  279.                 fmadds  f30,f1,f15,f30
  280.                 fmadds  f31,f1,f16,f31
  281. .end
  282.  
  283. ;      FLOAT_COLOR_TO_UBYTE_COLOR( color[j][0], sumR );
  284. ;      FLOAT_COLOR_TO_UBYTE_COLOR( color[j][1], sumG );
  285. ;      FLOAT_COLOR_TO_UBYTE_COLOR( color[j][2], sumB );
  286. ;      color[j][3] = sumA;
  287.  
  288.                 fsel    f29,f29,f29,f26
  289.                 fsel    f30,f30,f30,f26
  290.                 fsel    f31,f31,f31,f26
  291.                 lfs     f3,_f_255(r2)
  292.                 fsubs   f0,f29,f27
  293.                 fsubs   f1,f30,f27
  294.                 fsubs   f2,f31,f27
  295.                 subic.  r30,r30,1
  296.                 fsel    f29,f0,f27,f29
  297.                 fsel    f30,f1,f27,f30
  298.                 fsel    f31,f2,f27,f31
  299.                 fmuls   f29,f29,f3
  300.                 fmuls   f30,f30,f3
  301.                 fmuls   f31,f31,f3
  302.                 fctiwz  f29,f29
  303.                 fctiwz  f30,f30
  304.                 fctiwz  f31,f31
  305.                 stfd    f29,-8(r1)
  306.                 lwz     r3,-4(r1)
  307.                 slwi    r3,r3,24
  308.                 stfd    f30,-8(r1)
  309.                 lwz     r0,-4(r1)
  310.                 rlwimi  r3,r0,16,8,15
  311.                 stfd    f31,-8(r1)
  312.                 lwz     r0,-4(r1)
  313.                 rlwimi  r3,r0,8,16,23
  314.                 rlwimi  r3,r31,0,24,31
  315.                 stwu    r3,4(r28)
  316.                 bne     .loop
  317.                 addi    r1,r1,320
  318.                 lwz     r23,-8*19-9*4(r1)
  319.                 lwz     r24,-8*19-8*4(r1)
  320.                 lwz     r25,-8*19-7*4(r1)
  321.                 lwz     r26,-8*19-6*4(r1)
  322.                 lwz     r27,-8*19-5*4(r1)
  323.                 lwz     r28,-8*19-4*4(r1)
  324.                 lwz     r29,-8*19-3*4(r1)
  325.                 lwz     r30,-8*19-2*4(r1)
  326.                 lwz     r31,-8*19-1*4(r1)
  327.                 lwz     r0,8(r1)
  328.                 lfd    f14,-8*19(r1)
  329.                 lfd    f15,-8*18(r1)
  330.                 lfd    f16,-8*17(r1)
  331.                 lfd    f17,-8*16(r1)
  332.                 lfd    f18,-8*15(r1)
  333.                 lfd    f19,-8*14(r1)
  334.                 lfd    f20,-8*13(r1)
  335.                 lfd    f21,-8*12(r1)
  336.                 mtlr    r0
  337.                 lfd    f22,-8*11(r1)
  338.                 lfd    f23,-8*10(r1)
  339.                 lfd    f24,-8*9(r1)
  340.                 lfd    f25,-8*8(r1)
  341.                 lfd    f26,-8*7(r1)
  342.                 lfd    f27,-8*6(r1)
  343.                 lfd    f28,-8*5(r1)
  344.                 lfd    f29,-8*4(r1)
  345.                 lfd    f30,-8*3(r1)
  346.                 lfd    f31,-8*2(r1)
  347.                 blr
  348.  
  349.  
  350.  
  351.                 section data
  352. _f_1            dc.s    1.0
  353. _f_255          dc.s    255.0
  354. _f_s            dc.s    SHINE_TABLE_SIZE-1
  355. _f_huge         dc.s    255.0*128.0*65536.0
  356.  
  357.